# -*- coding: utf-8 -*-
import re
import os
import time
import json

import scrapy
from PIL import Image
from bs4 import BeautifulSoup
from scrapy.http import Request, FormRequest

class ZhihuSpider(scrapy.Spider):
    name = 'zhihu'
    allowed_domains = ['www.zhihu.com']
    start_urls = ['http://www.zhihu.com/']

    headers = {
        "HOST": "www.zhihu.com",
        "Referer": "https://www.zhizhu.com",
        'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
    }


    def parse(self, response):
        pass

    def start_requests(self):
        t = str(int(time.time() * 1000))
        captcha_url = 'https://www.zhihu.com/captcha.gif?r=%d&type=login&lang=en'
        return [scrapy.Request(captcha_url,headers=self.headers, callback=self.parser_captcha)]

    def parser_captcha(self, response):
        with open('captcha.gif', 'wb') as f:
            f.write(response.body)
            f.close()

        try:
            img = Image.open('captcha.gif')
            img.show()
            img.close()
        except:
            pass
        captcha = input("input the captcha with quotation mark\n>")
        # captcha = {
        #     'img_size': [200, 44],
        #     'input_points': [],
        # }
        # points = [[22.796875, 22], [42.796875, 22], [63.796875, 21], [84.796875, 20], [107.796875, 20],
        #           [129.796875, 22],
        #           [150.796875, 22]]
        # # seq = input('请输入倒立字的位置\n>')
        # for i in seq:
        #     captcha['input_points'].append(points[int(i) - 1])
        # 文本验证码怀疑是这里出错
        # json.dumps(captcha)
        return scrapy.FormRequest(url='https://www.zhihu.com/#signin', headers=self.headers, callback=self.login, meta={
            'captcha': captcha
        })

    def login(self,response):
        response_text = response.text
        soup = BeautifulSoup(response.text, 'html.parser')
        data = soup.find('div', attrs={'id': 'data'})['data-state']
        data = json.loads(data)
        xsrf = data['token']['xsrf']

        if xsrf:
            port_url = "https://www.zhihu.com/login/phone_num"
            port_data ={
                '_xsrf': xsrf,
                'phone_num': "18027359048",
                'password': "@Hua28252286",
                'captcha_type': 'en',
                "captcha": response.meta['captcha']
            }
            return [scrapy.FormRequest(
                url=port_url,
                formdata = port_data,
                headers=self.headers,
                callback=self.check_login
            )]

    def check_login(self, response):
        #验证服务器的返回数据判断是否成功
        pass
        # text_json = json.loads(response.text)
        # if "msg" in text_json and text_json["msg"] == "登录成功":
        #     for url in self.start_urls:
        #         yield scrapy.Request(url, dont_filter=True, headers=self.headers)

